/************************************************************************
* utf8.c
* voxelands - 3d voxel world sandbox game
* Copyright (C) Lisa 'darkrose' Milne 2016 <lisa@ltmnet.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
* See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program.  If not, see <http://www.gnu.org/licenses/>
*
* Modified from original Public Domain sources by Jeff Bezanson
************************************************************************/

#include "common.h"

#include <string.h>

static const uint32_t utf8_offsets[6] = {
	0x00000000UL, 0x00003080UL, 0x000E2080UL,
	0x03C82080UL, 0xFA082080UL, 0x82082080UL
};

static const char utf8_trailing_bytes[256] = {
	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
	1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
	2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
};

/* is c the start of a utf8 sequence? */
#define isutf8(c) (((c)&0xC0)!=0x80)

/* returns length of next utf-8 sequence */
int utf8_seqlen(char* str)
{
	return utf8_trailing_bytes[(unsigned int)(unsigned char)str[0]] + 1;
}

/* reads the next utf-8 sequence out of a string, updating an index */
uint32_t utf8_nextchar(char* str, int *i)
{
	uint32_t ch = 0;
	int sz = 0;

	do {
		ch <<= 6;
		ch += (unsigned char)str[(*i)++];
		sz++;
	} while (str[*i] && !isutf8(str[*i]));

	ch -= utf8_offsets[sz-1];

	return ch;
}

/* char index to byte offset */
int utf8_offset(char* str, int i)
{
	int offset = 0;

	while (i > 0 && str[offset]) {
		(void)(isutf8(str[++offset]) || isutf8(str[++offset]) || isutf8(str[++offset]) || ++offset);
		i--;
	}

	return offset;
}

/* byte offset to charindex */
int utf8_charindex(char* str, int o)
{
	int i = 0;
	int offset = 0;

	while (offset < o && str[offset]) {
		(void)(isutf8(str[++offset]) || isutf8(str[++offset]) || isutf8(str[++offset]) || ++offset);
		i++;
	}

	return i;
}

/* number of characters */
int utf8_strlen(char* str)
{
	int count = 0;
	int i = 0;

	while (utf8_nextchar(str, &i) != 0) {
		count++;
	}

	return count;
}

/* increment i by one character index */
void utf8_inc(char* str, int *i)
{
	(void)(isutf8(str[++(*i)]) || isutf8(str[++(*i)]) || isutf8(str[++(*i)]) || ++(*i));
}

/* decrement i by one character index */
void utf8_dec(char* str, int *i)
{
	(void)(isutf8(str[--(*i)]) || isutf8(str[--(*i)]) || isutf8(str[--(*i)]) || --(*i));
}

/* strchr() for utf8 */
char* utf8_strchr(char* str, uint32_t ch, int *charn)
{
	int i = 0;
	int lasti = 0;
	uint32_t c;

	if (charn)
		*charn = 0;

	while (str[i]) {
		c = utf8_nextchar(str, &i);
		if (c == ch)
			return &str[lasti];
		lasti = i;
		if (charn)
			(*charn)++;
	}

	return NULL;
}

/* memchr() for utf8 */
char* utf8_memchr(char* str, uint32_t ch, size_t sz, int *charn)
{
	int i = 0;
	int lasti = 0;
	uint32_t c;
	int csz;

	if (charn)
		*charn = 0;

	while (i < sz) {
		c = csz = 0;
		do {
			c <<= 6;
			c += (unsigned char)str[i++];
			csz++;
		} while (i < sz && !isutf8(str[i]));

		c -= utf8_offsets[csz-1];

		if (c == ch)
			return &str[lasti];
		lasti = i;
		if (charn)
			(*charn)++;
	}

	return NULL;
}
